library(partykit)
## Loading required package: grid
## Loading required package: libcoin
## Loading required package: mvtnorm
## Loading required package: rpart
library (dplyr)
## 
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
## 
##     filter, lag
## The following objects are masked from 'package:base':
## 
##     intersect, setdiff, setequal, union
library (ISLR)
library (ROCR)
## Loading required package: gplots
## 
## Attaching package: 'gplots'
## The following object is masked from 'package:stats':
## 
##     lowess
library (rpart)
library (tidyverse)
## ── Attaching packages ────────────────────────────────────────────────────────── tidyverse 1.2.1 ──
## ✔ ggplot2 2.2.1     ✔ readr   1.1.1
## ✔ tibble  1.4.2     ✔ purrr   0.2.4
## ✔ tidyr   0.8.0     ✔ stringr 1.2.0
## ✔ ggplot2 2.2.1     ✔ forcats 0.2.0
## ── Conflicts ───────────────────────────────────────────────────────────── tidyverse_conflicts() ──
## ✖ dplyr::filter() masks stats::filter()
## ✖ dplyr::lag()    masks stats::lag()
library (broom)


file_path <- '~/compscix415-DataSciencePrinciples/DataSciencePrinciples/data/admit_data_all_uc.csv'
admit_alldata <- read_csv(file = file_path, col_names=TRUE)
## Parsed with column specification:
## cols(
##   `School Name` = col_character(),
##   City = col_character(),
##   `County/State/ Territory` = col_character(),
##   `Measure Names` = col_character(),
##   School = col_character(),
##   Race = col_character(),
##   `Measure Values` = col_integer()
## )
file_path <- '~/compscix415-DataSciencePrinciples/DataSciencePrinciples/data/Berkeley.csv'
Berkeley <- read_csv(file = file_path)
## Warning: Duplicated column names deduplicated: 'School' => 'School_1' [5]
## Parsed with column specification:
## cols(
##   School = col_character(),
##   City = col_character(),
##   `County/State/ Territory` = col_character(),
##   `Measure Names` = col_character(),
##   School_1 = col_character(),
##   `Uad Uc Ethn 6 Cat` = col_character(),
##   `Measure Values` = col_integer()
## )
file_path <- '~/compscix415-DataSciencePrinciples/DataSciencePrinciples/data/Davis.csv'
Davis<- read_csv(file = file_path, col_names=TRUE)
## Parsed with column specification:
## cols(
##   Calculation1 = col_character(),
##   City = col_character(),
##   `County/State/ Territory` = col_character(),
##   `Measure Names` = col_character(),
##   School = col_character(),
##   `Uad Uc Ethn 6 Cat` = col_character(),
##   `Measure Values` = col_integer()
## )
file_path <- '~/compscix415-DataSciencePrinciples/DataSciencePrinciples/data/Irvine.csv'
Irvine<- read_csv(file = file_path, col_names=TRUE)
## Parsed with column specification:
## cols(
##   Calculation1 = col_character(),
##   City = col_character(),
##   `County/State/ Territory` = col_character(),
##   `Measure Names` = col_character(),
##   School = col_character(),
##   `Uad Uc Ethn 6 Cat` = col_character(),
##   `Measure Values` = col_integer()
## )
file_path <- '~/compscix415-DataSciencePrinciples/DataSciencePrinciples/data/LosAngeles_all.csv'
LA_all<- read_csv(file = file_path, col_names=TRUE)
## Parsed with column specification:
## cols(
##   Calculation1 = col_character(),
##   City = col_character(),
##   `County/State/ Territory` = col_character(),
##   `Measure Names` = col_character(),
##   School = col_character(),
##   `Uad Uc Ethn 6 Cat` = col_character(),
##   `Measure Values` = col_integer()
## )
file_path <- '~/compscix415-DataSciencePrinciples/DataSciencePrinciples/data/Merced.csv'
Merced<- read_csv(file = file_path, col_names=TRUE)
## Parsed with column specification:
## cols(
##   Calculation1 = col_character(),
##   City = col_character(),
##   `County/State/ Territory` = col_character(),
##   `Measure Names` = col_character(),
##   School = col_character(),
##   `Uad Uc Ethn 6 Cat` = col_character(),
##   `Measure Values` = col_integer()
## )
file_path <- '~/compscix415-DataSciencePrinciples/DataSciencePrinciples/data/Riverside.csv'
Riverside<- read_csv(file = file_path, col_names=TRUE)
## Parsed with column specification:
## cols(
##   Calculation1 = col_character(),
##   City = col_character(),
##   `County/State/ Territory` = col_character(),
##   `Measure Names` = col_character(),
##   School = col_character(),
##   `Uad Uc Ethn 6 Cat` = col_character(),
##   `Measure Values` = col_integer()
## )
file_path <- '~/compscix415-DataSciencePrinciples/DataSciencePrinciples/data/SanDiego.csv'
SanDiego<- read_csv(file = file_path, col_names=TRUE)
## Parsed with column specification:
## cols(
##   Calculation1 = col_character(),
##   City = col_character(),
##   `County/State/ Territory` = col_character(),
##   `Measure Names` = col_character(),
##   School = col_character(),
##   `Uad Uc Ethn 6 Cat` = col_character(),
##   `Measure Values` = col_integer()
## )
file_path <- '~/compscix415-DataSciencePrinciples/DataSciencePrinciples/data/SantaBarb.csv'
SantaBarb<- read_csv(file = file_path, col_names=TRUE)
## Parsed with column specification:
## cols(
##   Calculation1 = col_character(),
##   City = col_character(),
##   `County/State/ Territory` = col_character(),
##   `Measure Names` = col_character(),
##   School = col_character(),
##   `Uad Uc Ethn 6 Cat` = col_character(),
##   `Measure Values` = col_integer()
## )
file_path <- '~/compscix415-DataSciencePrinciples/DataSciencePrinciples/data/SantaCruz.csv'
SantaCruz<- read_csv(file = file_path, col_names=TRUE)
## Parsed with column specification:
## cols(
##   Calculation1 = col_character(),
##   City = col_character(),
##   `County/State/ Territory` = col_character(),
##   `Measure Names` = col_character(),
##   School = col_character(),
##   `Uad Uc Ethn 6 Cat` = col_character(),
##   `Measure Values` = col_integer()
## )
admit_alldata
## # A tibble: 12,876 x 7
##    `School Name`   City   `County/State/ T… `Measure Names` School  Race  
##    <chr>           <chr>  <chr>             <chr>           <chr>   <chr> 
##  1 A B MILLER HIG… Fonta… San Bernardino    enr             A B MI… Domes…
##  2 A B MILLER HIG… Fonta… San Bernardino    adm             A B MI… Domes…
##  3 A B MILLER HIG… Fonta… San Bernardino    app             A B MI… Domes…
##  4 A B MILLER HIG… Fonta… San Bernardino    enr             A B MI… Hispa…
##  5 A B MILLER HIG… Fonta… San Bernardino    adm             A B MI… Hispa…
##  6 A B MILLER HIG… Fonta… San Bernardino    app             A B MI… Hispa…
##  7 A B MILLER HIG… Fonta… San Bernardino    enr             A B MI… All   
##  8 A B MILLER HIG… Fonta… San Bernardino    adm             A B MI… All   
##  9 A B MILLER HIG… Fonta… San Bernardino    app             A B MI… All   
## 10 ABRAHAM LINCOL… Los A… Los Angeles       enr             ABRAHA… Asian 
## # ... with 12,866 more rows, and 1 more variable: `Measure Values` <int>
Berkeley
## # A tibble: 534 x 7
##    School City  `County/State/ … `Measure Names` School_1 `Uad Uc Ethn 6 …
##    <chr>  <chr> <chr>            <chr>           <chr>    <chr>           
##  1 ALAME… Alam… Alameda          enr             ALAMEDA… White           
##  2 ALAME… Alam… Alameda          adm             ALAMEDA… White           
##  3 ALAME… Alam… Alameda          app             ALAMEDA… White           
##  4 ALAME… Alam… Alameda          enr             ALAMEDA… Asian           
##  5 ALAME… Alam… Alameda          adm             ALAMEDA… Asian           
##  6 ALAME… Alam… Alameda          app             ALAMEDA… Asian           
##  7 ALAME… Alam… Alameda          enr             ALAMEDA… All             
##  8 ALAME… Alam… Alameda          adm             ALAMEDA… All             
##  9 ALAME… Alam… Alameda          app             ALAMEDA… All             
## 10 ALAME… Alam… Alameda          enr             ALAMEDA… White           
## # ... with 524 more rows, and 1 more variable: `Measure Values` <int>
admit_data <- spread(data=admit_alldata, key="Measure Names", value="Measure Values") %>% mutate(AdmitStat=adm/app, Campus="All", Year=2017)

Berkeley_admit <- spread(data=Berkeley, key="Measure Names", value="Measure Values") %>% mutate(AdmitStat=adm/app, Campus="Berkeley", Year=2017) 

Davis_admit <- spread(data=Davis, key="Measure Names", value="Measure Values") %>% mutate(AdmitStat=adm/app, Campus="Davis", Year=2017) 

Irvine_admit <- spread(data=Irvine, key="Measure Names", value="Measure Values") %>% mutate(AdmitStat=adm/app, Campus="Irvine", Year=2017) 

LA_all_admit <- spread(data=LA_all, key="Measure Names", value="Measure Values") %>% mutate(AdmitStat=adm/app, Campus="LosAng", Year=2017) 

Merced_admit <- spread(data=Merced, key="Measure Names", value="Measure Values") %>% mutate(AdmitStat=adm/app, Campus="Merced", Year=2017) 

Riverside_admit <- spread(data=Riverside, key="Measure Names", value="Measure Values") %>% mutate(AdmitStat=adm/app, Campus="Riverside", Year=2017) 

SanDiego_admit <- spread(data=SanDiego, key="Measure Names", value="Measure Values") %>% mutate(AdmitStat=adm/app, Campus="SanDiego", Year=2017) 

SantaBarb_admit <- spread(SantaBarb, key="Measure Names", value="Measure Values") %>% mutate(AdmitStat=adm/app, Campus="SantaBarb", Year=2017) 

SantaCruz_admit <- spread(data=SantaCruz, key="Measure Names", value="Measure Values") %>% mutate(AdmitStat=adm/app, Campus="SantaCruz", Year=2017) 


admit_data
## # A tibble: 4,292 x 11
##    `School Name`   City   `County/State/ … School  Race    adm   app   enr
##    <chr>           <chr>  <chr>            <chr>   <chr> <int> <int> <int>
##  1 A B MILLER HIG… Fonta… San Bernardino   A B MI… Dome…    28    30    13
##  2 A B MILLER HIG… Fonta… San Bernardino   A B MI… Hisp…    36    52    18
##  3 A B MILLER HIG… Fonta… San Bernardino   A B MI… All      68    88    34
##  4 ABRAHAM LINCOL… Los A… Los Angeles      ABRAHA… Asian    25    36    22
##  5 ABRAHAM LINCOL… Los A… Los Angeles      ABRAHA… Hisp…    10    21     7
##  6 ABRAHAM LINCOL… Los A… Los Angeles      ABRAHA… All      39    61    33
##  7 ABRAHAM LINCOL… San F… San Francisco    ABRAHA… White     7    12     6
##  8 ABRAHAM LINCOL… San F… San Francisco    ABRAHA… Asian   105   193    73
##  9 ABRAHAM LINCOL… San F… San Francisco    ABRAHA… Hisp…    14    24     7
## 10 ABRAHAM LINCOL… San F… San Francisco    ABRAHA… Afri…    NA     6    NA
## # ... with 4,282 more rows, and 3 more variables: AdmitStat <dbl>,
## #   Campus <chr>, Year <dbl>
Berkeley_admit
## # A tibble: 178 x 11
##    School   City   `County/State/ … School_1  `Uad Uc Ethn 6 …   adm   app
##    <chr>    <chr>  <chr>            <chr>     <chr>            <int> <int>
##  1 ALAMEDA… Alame… Alameda          ALAMEDA … All                  7    19
##  2 ALAMEDA… Alame… Alameda          ALAMEDA … Asian               NA     5
##  3 ALAMEDA… Alame… Alameda          ALAMEDA … White                4     7
##  4 ALAMEDA… Alame… Alameda          ALAMEDA … African American    NA     9
##  5 ALAMEDA… Alame… Alameda          ALAMEDA … All                 29   125
##  6 ALAMEDA… Alame… Alameda          ALAMEDA … Asian               15    77
##  7 ALAMEDA… Alame… Alameda          ALAMEDA … Hispanic/ Latino    NA     5
##  8 ALAMEDA… Alame… Alameda          ALAMEDA … White                9    28
##  9 ALAMEDA… Alame… Alameda          ALAMEDA … All                  5    34
## 10 ALAMEDA… Alame… Alameda          ALAMEDA … Asian                5    24
## # ... with 168 more rows, and 4 more variables: enr <int>,
## #   AdmitStat <dbl>, Campus <chr>, Year <dbl>
Davis_admit
## # A tibble: 182 x 11
##    Calculation1 City  `County/State/ … School `Uad Uc Ethn 6 …   adm   app
##    <chr>        <chr> <chr>            <chr>  <chr>            <int> <int>
##  1 ALAMEDA COM… Alam… Alameda          ALAME… All                  5    11
##  2 ALAMEDA COM… Alam… Alameda          ALAME… Asian                3     5
##  3 ALAMEDA HIG… Alam… Alameda          ALAME… African American    NA     9
##  4 ALAMEDA HIG… Alam… Alameda          ALAME… All                 62   157
##  5 ALAMEDA HIG… Alam… Alameda          ALAME… Asian               37    97
##  6 ALAMEDA HIG… Alam… Alameda          ALAME… Hispanic/ Latino     4    11
##  7 ALAMEDA HIG… Alam… Alameda          ALAME… White               17    34
##  8 ALAMEDA SCI… Alam… Alameda          ALAME… All                 13    35
##  9 ALAMEDA SCI… Alam… Alameda          ALAME… Asian               10    24
## 10 ALAMEDA SCI… Alam… Alameda          ALAME… Hispanic/ Latino    NA     5
## # ... with 172 more rows, and 4 more variables: enr <int>,
## #   AdmitStat <dbl>, Campus <chr>, Year <dbl>
Irvine_admit
## # A tibble: 146 x 11
##    Calculation1 City  `County/State/ … School `Uad Uc Ethn 6 …   adm   app
##    <chr>        <chr> <chr>            <chr>  <chr>            <int> <int>
##  1 ALAMEDA COM… Alam… Alameda          ALAME… All                  5     8
##  2 ALAMEDA COM… Alam… Alameda          ALAME… Asian                4     5
##  3 ALAMEDA HIG… Alam… Alameda          ALAME… African American    NA     6
##  4 ALAMEDA HIG… Alam… Alameda          ALAME… All                 54   128
##  5 ALAMEDA HIG… Alam… Alameda          ALAME… Asian               41    95
##  6 ALAMEDA HIG… Alam… Alameda          ALAME… White                5    18
##  7 ALAMEDA SCI… Alam… Alameda          ALAME… All                 12    30
##  8 ALAMEDA SCI… Alam… Alameda          ALAME… Asian               10    22
##  9 ALBANY HIGH… Alba… Alameda          ALBAN… All                 24    68
## 10 ALBANY HIGH… Alba… Alameda          ALBAN… Asian               20    42
## # ... with 136 more rows, and 4 more variables: enr <int>,
## #   AdmitStat <dbl>, Campus <chr>, Year <dbl>
LA_all_admit
## # A tibble: 3,445 x 11
##    Calculation1 City  `County/State/ … School `Uad Uc Ethn 6 …   adm   app
##    <chr>        <chr> <chr>            <chr>  <chr>            <int> <int>
##  1 A B MILLER … Font… San Bernardino   A B M… Domestic Unknown    NA     9
##  2 A B MILLER … Font… San Bernardino   A B M… Hispanic/ Latino     3    27
##  3 A B MILLER … Font… San Bernardino   A B M… All                  4    39
##  4 ABRAHAM LIN… Los … Los Angeles      ABRAH… Asian                3    26
##  5 ABRAHAM LIN… Los … Los Angeles      ABRAH… Hispanic/ Latino     3    10
##  6 ABRAHAM LIN… Los … Los Angeles      ABRAH… All                  7    39
##  7 ABRAHAM LIN… San … San Francisco    ABRAH… White               NA     9
##  8 ABRAHAM LIN… San … San Francisco    ABRAH… Asian                9   101
##  9 ABRAHAM LIN… San … San Francisco    ABRAH… Hispanic/ Latino    NA     7
## 10 ABRAHAM LIN… San … San Francisco    ABRAH… All                 11   125
## # ... with 3,435 more rows, and 4 more variables: enr <int>,
## #   AdmitStat <dbl>, Campus <chr>, Year <dbl>
Merced_admit
## # A tibble: 119 x 11
##    Calculation1 City  `County/State/ … School `Uad Uc Ethn 6 …   adm   app
##    <chr>        <chr> <chr>            <chr>  <chr>            <int> <int>
##  1 ALAMEDA COM… Alam… Alameda          ALAME… All                  5     5
##  2 ALAMEDA HIG… Alam… Alameda          ALAME… All                 41    48
##  3 ALAMEDA HIG… Alam… Alameda          ALAME… Asian               27    33
##  4 ALAMEDA HIG… Alam… Alameda          ALAME… White                9     9
##  5 ALAMEDA SCI… Alam… Alameda          ALAME… All                  7     9
##  6 ALBANY HIGH… Alba… Alameda          ALBAN… All                 12    15
##  7 ALBANY HIGH… Alba… Alameda          ALBAN… Asian                9    10
##  8 AMADOR VALL… Plea… Alameda          AMADO… All                 45    55
##  9 AMADOR VALL… Plea… Alameda          AMADO… Asian               26    32
## 10 AMADOR VALL… Plea… Alameda          AMADO… Hispanic/ Latino     6     7
## # ... with 109 more rows, and 4 more variables: enr <int>,
## #   AdmitStat <dbl>, Campus <chr>, Year <dbl>
Riverside_admit
## # A tibble: 115 x 11
##    Calculation1 City  `County/State/ … School `Uad Uc Ethn 6 …   adm   app
##    <chr>        <chr> <chr>            <chr>  <chr>            <int> <int>
##  1 ALAMEDA HIG… Alam… Alameda          ALAME… All                 55    72
##  2 ALAMEDA HIG… Alam… Alameda          ALAME… Asian               38    51
##  3 ALAMEDA HIG… Alam… Alameda          ALAME… White                9    11
##  4 ALAMEDA SCI… Alam… Alameda          ALAME… All                  5     8
##  5 ALAMEDA SCI… Alam… Alameda          ALAME… Asian                3     5
##  6 ALBANY HIGH… Alba… Alameda          ALBAN… All                 17    23
##  7 ALBANY HIGH… Alba… Alameda          ALBAN… Asian                9    12
##  8 ALBANY HIGH… Alba… Alameda          ALBAN… White                4     6
##  9 AMADOR VALL… Plea… Alameda          AMADO… All                 76    88
## 10 AMADOR VALL… Plea… Alameda          AMADO… Asian               49    55
## # ... with 105 more rows, and 4 more variables: enr <int>,
## #   AdmitStat <dbl>, Campus <chr>, Year <dbl>
SanDiego_admit
## # A tibble: 143 x 11
##    Calculation1 City  `County/State/ … School `Uad Uc Ethn 6 …   adm   app
##    <chr>        <chr> <chr>            <chr>  <chr>            <int> <int>
##  1 ALAMEDA COM… Alam… Alameda          ALAME… All                  5    12
##  2 ALAMEDA HIG… Alam… Alameda          ALAME… African American    NA     7
##  3 ALAMEDA HIG… Alam… Alameda          ALAME… All                 53   148
##  4 ALAMEDA HIG… Alam… Alameda          ALAME… Asian               33    95
##  5 ALAMEDA HIG… Alam… Alameda          ALAME… Domestic Unknown    NA     5
##  6 ALAMEDA HIG… Alam… Alameda          ALAME… Hispanic/ Latino     3     8
##  7 ALAMEDA HIG… Alam… Alameda          ALAME… White               14    29
##  8 ALAMEDA SCI… Alam… Alameda          ALAME… All                  5    28
##  9 ALAMEDA SCI… Alam… Alameda          ALAME… Asian                4    21
## 10 ALBANY HIGH… Alba… Alameda          ALBAN… All                 38    87
## # ... with 133 more rows, and 4 more variables: enr <int>,
## #   AdmitStat <dbl>, Campus <chr>, Year <dbl>
SantaBarb_admit
## # A tibble: 149 x 11
##    Calculation1 City  `County/State/ … School `Uad Uc Ethn 6 …   adm   app
##    <chr>        <chr> <chr>            <chr>  <chr>            <int> <int>
##  1 ALAMEDA COM… Alam… Alameda          ALAME… All                  3    13
##  2 ALAMEDA COM… Alam… Alameda          ALAME… White               NA     6
##  3 ALAMEDA HIG… Alam… Alameda          ALAME… African American    NA     8
##  4 ALAMEDA HIG… Alam… Alameda          ALAME… All                 47   144
##  5 ALAMEDA HIG… Alam… Alameda          ALAME… Asian               25    84
##  6 ALAMEDA HIG… Alam… Alameda          ALAME… Domestic Unknown    NA     6
##  7 ALAMEDA HIG… Alam… Alameda          ALAME… Hispanic/ Latino     4     8
##  8 ALAMEDA HIG… Alam… Alameda          ALAME… White               13    34
##  9 ALAMEDA SCI… Alam… Alameda          ALAME… All                  5    21
## 10 ALAMEDA SCI… Alam… Alameda          ALAME… Asian                3    14
## # ... with 139 more rows, and 4 more variables: enr <int>,
## #   AdmitStat <dbl>, Campus <chr>, Year <dbl>
SantaCruz_admit
## # A tibble: 167 x 11
##    Calculation1 City  `County/State/ … School `Uad Uc Ethn 6 …   adm   app
##    <chr>        <chr> <chr>            <chr>  <chr>            <int> <int>
##  1 ALAMEDA COM… Alam… Alameda          ALAME… All                  9    18
##  2 ALAMEDA COM… Alam… Alameda          ALAME… Asian               NA     6
##  3 ALAMEDA HIG… Alam… Alameda          ALAME… African American    NA     6
##  4 ALAMEDA HIG… Alam… Alameda          ALAME… All                 74   133
##  5 ALAMEDA HIG… Alam… Alameda          ALAME… Asian               43    80
##  6 ALAMEDA HIG… Alam… Alameda          ALAME… Hispanic/ Latino     4     9
##  7 ALAMEDA HIG… Alam… Alameda          ALAME… White               22    32
##  8 ALAMEDA SCI… Alam… Alameda          ALAME… All                 13    27
##  9 ALAMEDA SCI… Alam… Alameda          ALAME… Asian                9    17
## 10 ALBANY HIGH… Alba… Alameda          ALBAN… All                 69   106
## # ... with 157 more rows, and 4 more variables: enr <int>,
## #   AdmitStat <dbl>, Campus <chr>, Year <dbl>
names(admit_data)[1]<-"SchoolID"
names(admit_data)[3]<-"County"

All_campus <- admit_data %>% 
  filter(County=="Alameda")
##admit_data[10] <- NULL

All_campus
## # A tibble: 207 x 11
##    SchoolID  City   County School Race    adm   app   enr AdmitStat Campus
##    <chr>     <chr>  <chr>  <chr>  <chr> <int> <int> <int>     <dbl> <chr> 
##  1 ALAMEDA … Alame… Alame… ALAME… White     6     7     6     0.857 All   
##  2 ALAMEDA … Alame… Alame… ALAME… Asian     8     9     3     0.889 All   
##  3 ALAMEDA … Alame… Alame… ALAME… Hisp…     4     5    NA     0.800 All   
##  4 ALAMEDA … Alame… Alame… ALAME… All      21    25    11     0.840 All   
##  5 ALAMEDA … Alame… Alame… ALAME… Dome…     4     6    NA     0.667 All   
##  6 ALAMEDA … Alame… Alame… ALAME… White    37    50    22     0.740 All   
##  7 ALAMEDA … Alame… Alame… ALAME… Asian   100   124    71     0.806 All   
##  8 ALAMEDA … Alame… Alame… ALAME… Hisp…     8    12     5     0.667 All   
##  9 ALAMEDA … Alame… Alame… ALAME… Afri…     8    14     5     0.571 All   
## 10 ALAMEDA … Alame… Alame… ALAME… All     161   210   107     0.767 All   
## # ... with 197 more rows, and 1 more variable: Year <dbl>
names(Berkeley_admit)[1]<-"SchoolID"
names(Berkeley_admit)[3]<-"County"
names(Berkeley_admit)[4]<-"School"
names(Berkeley_admit)[5]<-"Race"

Berkeley_admit
## # A tibble: 178 x 11
##    SchoolID  City   County School Race    adm   app   enr AdmitStat Campus
##    <chr>     <chr>  <chr>  <chr>  <chr> <int> <int> <int>     <dbl> <chr> 
##  1 ALAMEDA … Alame… Alame… ALAME… All       7    19     5     0.368 Berke…
##  2 ALAMEDA … Alame… Alame… ALAME… Asian    NA     5    NA    NA     Berke…
##  3 ALAMEDA … Alame… Alame… ALAME… White     4     7     4     0.571 Berke…
##  4 ALAMEDA … Alame… Alame… ALAME… Afri…    NA     9    NA    NA     Berke…
##  5 ALAMEDA … Alame… Alame… ALAME… All      29   125    13     0.232 Berke…
##  6 ALAMEDA … Alame… Alame… ALAME… Asian    15    77     8     0.195 Berke…
##  7 ALAMEDA … Alame… Alame… ALAME… Hisp…    NA     5    NA    NA     Berke…
##  8 ALAMEDA … Alame… Alame… ALAME… White     9    28     3     0.321 Berke…
##  9 ALAMEDA … Alame… Alame… ALAME… All       5    34     4     0.147 Berke…
## 10 ALAMEDA … Alame… Alame… ALAME… Asian     5    24     4     0.208 Berke…
## # ... with 168 more rows, and 1 more variable: Year <dbl>
names(Davis_admit)[1]<-"SchoolID"
names(Davis_admit)[3]<-"County"
names(Davis_admit)[4]<-"School"
names(Davis_admit)[5]<-"Race"

Davis_admit
## # A tibble: 182 x 11
##    SchoolID  City   County School Race    adm   app   enr AdmitStat Campus
##    <chr>     <chr>  <chr>  <chr>  <chr> <int> <int> <int>     <dbl> <chr> 
##  1 ALAMEDA … Alame… Alame… ALAME… All       5    11    NA     0.455 Davis 
##  2 ALAMEDA … Alame… Alame… ALAME… Asian     3     5    NA     0.600 Davis 
##  3 ALAMEDA … Alame… Alame… ALAME… Afri…    NA     9    NA    NA     Davis 
##  4 ALAMEDA … Alame… Alame… ALAME… All      62   157    18     0.395 Davis 
##  5 ALAMEDA … Alame… Alame… ALAME… Asian    37    97    14     0.381 Davis 
##  6 ALAMEDA … Alame… Alame… ALAME… Hisp…     4    11    NA     0.364 Davis 
##  7 ALAMEDA … Alame… Alame… ALAME… White    17    34     4     0.500 Davis 
##  8 ALAMEDA … Alame… Alame… ALAME… All      13    35     6     0.371 Davis 
##  9 ALAMEDA … Alame… Alame… ALAME… Asian    10    24     5     0.417 Davis 
## 10 ALAMEDA … Alame… Alame… ALAME… Hisp…    NA     5    NA    NA     Davis 
## # ... with 172 more rows, and 1 more variable: Year <dbl>
names(Irvine_admit)[1]<-"SchoolID"
names(Irvine_admit)[3]<-"County"
names(Irvine_admit)[4]<-"School"
names(Irvine_admit)[5]<-"Race"

Irvine_admit
## # A tibble: 146 x 11
##    SchoolID  City   County School Race    adm   app   enr AdmitStat Campus
##    <chr>     <chr>  <chr>  <chr>  <chr> <int> <int> <int>     <dbl> <chr> 
##  1 ALAMEDA … Alame… Alame… ALAME… All       5     8    NA     0.625 Irvine
##  2 ALAMEDA … Alame… Alame… ALAME… Asian     4     5    NA     0.800 Irvine
##  3 ALAMEDA … Alame… Alame… ALAME… Afri…    NA     6    NA    NA     Irvine
##  4 ALAMEDA … Alame… Alame… ALAME… All      54   128    15     0.422 Irvine
##  5 ALAMEDA … Alame… Alame… ALAME… Asian    41    95    14     0.432 Irvine
##  6 ALAMEDA … Alame… Alame… ALAME… White     5    18    NA     0.278 Irvine
##  7 ALAMEDA … Alame… Alame… ALAME… All      12    30     3     0.400 Irvine
##  8 ALAMEDA … Alame… Alame… ALAME… Asian    10    22     3     0.455 Irvine
##  9 ALBANY H… Albany Alame… ALBAN… All      24    68    NA     0.353 Irvine
## 10 ALBANY H… Albany Alame… ALBAN… Asian    20    42    NA     0.476 Irvine
## # ... with 136 more rows, and 1 more variable: Year <dbl>
names(LA_all_admit)[1]<-"SchoolID"
names(LA_all_admit)[3]<-"County"
names(LA_all_admit)[4]<-"School"
names(LA_all_admit)[5]<-"Race"

LA_all_admit
## # A tibble: 3,445 x 11
##    SchoolID  City   County School Race    adm   app   enr AdmitStat Campus
##    <chr>     <chr>  <chr>  <chr>  <chr> <int> <int> <int>     <dbl> <chr> 
##  1 A B MILL… Fonta… San B… A B M… Dome…    NA     9    NA   NA      LosAng
##  2 A B MILL… Fonta… San B… A B M… Hisp…     3    27    NA    0.111  LosAng
##  3 A B MILL… Fonta… San B… A B M… All       4    39    NA    0.103  LosAng
##  4 ABRAHAM … Los A… Los A… ABRAH… Asian     3    26     3    0.115  LosAng
##  5 ABRAHAM … Los A… Los A… ABRAH… Hisp…     3    10     3    0.300  LosAng
##  6 ABRAHAM … Los A… Los A… ABRAH… All       7    39     7    0.179  LosAng
##  7 ABRAHAM … San F… San F… ABRAH… White    NA     9    NA   NA      LosAng
##  8 ABRAHAM … San F… San F… ABRAH… Asian     9   101     6    0.0891 LosAng
##  9 ABRAHAM … San F… San F… ABRAH… Hisp…    NA     7    NA   NA      LosAng
## 10 ABRAHAM … San F… San F… ABRAH… All      11   125     7    0.0880 LosAng
## # ... with 3,435 more rows, and 1 more variable: Year <dbl>
LA_admit <- LA_all_admit %>% 
  filter(County=="Alameda")

LA_admit
## # A tibble: 164 x 11
##    SchoolID  City   County School Race    adm   app   enr AdmitStat Campus
##    <chr>     <chr>  <chr>  <chr>  <chr> <int> <int> <int>     <dbl> <chr> 
##  1 ALAMEDA … Alame… Alame… ALAME… White    NA     5    NA    NA     LosAng
##  2 ALAMEDA … Alame… Alame… ALAME… Asian    NA     5    NA    NA     LosAng
##  3 ALAMEDA … Alame… Alame… ALAME… All      NA    13    NA    NA     LosAng
##  4 ALAMEDA … Alame… Alame… ALAME… White     3    21    NA     0.143 LosAng
##  5 ALAMEDA … Alame… Alame… ALAME… Asian    15    85     7     0.176 LosAng
##  6 ALAMEDA … Alame… Alame… ALAME… Afri…    NA     8    NA    NA     LosAng
##  7 ALAMEDA … Alame… Alame… ALAME… All      22   126    11     0.175 LosAng
##  8 ALAMEDA … Alame… Alame… ALAME… Asian    NA    17    NA    NA     LosAng
##  9 ALAMEDA … Alame… Alame… ALAME… Hisp…    NA     5    NA    NA     LosAng
## 10 ALAMEDA … Alame… Alame… ALAME… All      NA    25    NA    NA     LosAng
## # ... with 154 more rows, and 1 more variable: Year <dbl>
names(Merced_admit)[1]<-"SchoolID"
names(Merced_admit)[3]<-"County"
names(Merced_admit)[4]<-"School"
names(Merced_admit)[5]<-"Race"

Merced_admit
## # A tibble: 119 x 11
##    SchoolID  City   County School Race    adm   app   enr AdmitStat Campus
##    <chr>     <chr>  <chr>  <chr>  <chr> <int> <int> <int>     <dbl> <chr> 
##  1 ALAMEDA … Alame… Alame… ALAME… All       5     5    NA     1.00  Merced
##  2 ALAMEDA … Alame… Alame… ALAME… All      41    48     6     0.854 Merced
##  3 ALAMEDA … Alame… Alame… ALAME… Asian    27    33     3     0.818 Merced
##  4 ALAMEDA … Alame… Alame… ALAME… White     9     9    NA     1.00  Merced
##  5 ALAMEDA … Alame… Alame… ALAME… All       7     9    NA     0.778 Merced
##  6 ALBANY H… Albany Alame… ALBAN… All      12    15    NA     0.800 Merced
##  7 ALBANY H… Albany Alame… ALBAN… Asian     9    10    NA     0.900 Merced
##  8 AMADOR V… Pleas… Alame… AMADO… All      45    55     6     0.818 Merced
##  9 AMADOR V… Pleas… Alame… AMADO… Asian    26    32    NA     0.812 Merced
## 10 AMADOR V… Pleas… Alame… AMADO… Hisp…     6     7    NA     0.857 Merced
## # ... with 109 more rows, and 1 more variable: Year <dbl>
names(Riverside_admit)[1]<-"SchoolID"
names(Riverside_admit)[3]<-"County"
names(Riverside_admit)[4]<-"School"
names(Riverside_admit)[5]<-"Race"

Riverside_admit
## # A tibble: 115 x 11
##    SchoolID  City   County School Race    adm   app   enr AdmitStat Campus
##    <chr>     <chr>  <chr>  <chr>  <chr> <int> <int> <int>     <dbl> <chr> 
##  1 ALAMEDA … Alame… Alame… ALAME… All      55    72    10     0.764 River…
##  2 ALAMEDA … Alame… Alame… ALAME… Asian    38    51     8     0.745 River…
##  3 ALAMEDA … Alame… Alame… ALAME… White     9    11    NA     0.818 River…
##  4 ALAMEDA … Alame… Alame… ALAME… All       5     8    NA     0.625 River…
##  5 ALAMEDA … Alame… Alame… ALAME… Asian     3     5    NA     0.600 River…
##  6 ALBANY H… Albany Alame… ALBAN… All      17    23     4     0.739 River…
##  7 ALBANY H… Albany Alame… ALBAN… Asian     9    12     3     0.750 River…
##  8 ALBANY H… Albany Alame… ALBAN… White     4     6    NA     0.667 River…
##  9 AMADOR V… Pleas… Alame… AMADO… All      76    88     3     0.864 River…
## 10 AMADOR V… Pleas… Alame… AMADO… Asian    49    55    NA     0.891 River…
## # ... with 105 more rows, and 1 more variable: Year <dbl>
names(SanDiego_admit)[1]<-"SchoolID"
names(SanDiego_admit)[3]<-"County"
names(SanDiego_admit)[4]<-"School"
names(SanDiego_admit)[5]<-"Race"

SanDiego_admit
## # A tibble: 143 x 11
##    SchoolID  City   County School Race    adm   app   enr AdmitStat Campus
##    <chr>     <chr>  <chr>  <chr>  <chr> <int> <int> <int>     <dbl> <chr> 
##  1 ALAMEDA … Alame… Alame… ALAME… All       5    12    NA     0.417 SanDi…
##  2 ALAMEDA … Alame… Alame… ALAME… Afri…    NA     7    NA    NA     SanDi…
##  3 ALAMEDA … Alame… Alame… ALAME… All      53   148     7     0.358 SanDi…
##  4 ALAMEDA … Alame… Alame… ALAME… Asian    33    95     4     0.347 SanDi…
##  5 ALAMEDA … Alame… Alame… ALAME… Dome…    NA     5    NA    NA     SanDi…
##  6 ALAMEDA … Alame… Alame… ALAME… Hisp…     3     8    NA     0.375 SanDi…
##  7 ALAMEDA … Alame… Alame… ALAME… White    14    29    NA     0.483 SanDi…
##  8 ALAMEDA … Alame… Alame… ALAME… All       5    28    NA     0.179 SanDi…
##  9 ALAMEDA … Alame… Alame… ALAME… Asian     4    21    NA     0.190 SanDi…
## 10 ALBANY H… Albany Alame… ALBAN… All      38    87     9     0.437 SanDi…
## # ... with 133 more rows, and 1 more variable: Year <dbl>
names(SantaBarb_admit)[1]<-"SchoolID"
names(SantaBarb_admit)[3]<-"County"
names(SantaBarb_admit)[4]<-"School"
names(SantaBarb_admit)[5]<-"Race"

SantaBarb_admit
## # A tibble: 149 x 11
##    SchoolID  City   County School Race    adm   app   enr AdmitStat Campus
##    <chr>     <chr>  <chr>  <chr>  <chr> <int> <int> <int>     <dbl> <chr> 
##  1 ALAMEDA … Alame… Alame… ALAME… All       3    13    NA     0.231 Santa…
##  2 ALAMEDA … Alame… Alame… ALAME… White    NA     6    NA    NA     Santa…
##  3 ALAMEDA … Alame… Alame… ALAME… Afri…    NA     8    NA    NA     Santa…
##  4 ALAMEDA … Alame… Alame… ALAME… All      47   144     9     0.326 Santa…
##  5 ALAMEDA … Alame… Alame… ALAME… Asian    25    84     4     0.298 Santa…
##  6 ALAMEDA … Alame… Alame… ALAME… Dome…    NA     6    NA    NA     Santa…
##  7 ALAMEDA … Alame… Alame… ALAME… Hisp…     4     8    NA     0.500 Santa…
##  8 ALAMEDA … Alame… Alame… ALAME… White    13    34     5     0.382 Santa…
##  9 ALAMEDA … Alame… Alame… ALAME… All       5    21    NA     0.238 Santa…
## 10 ALAMEDA … Alame… Alame… ALAME… Asian     3    14    NA     0.214 Santa…
## # ... with 139 more rows, and 1 more variable: Year <dbl>
names(SantaCruz_admit)[1]<-"SchoolID"
names(SantaCruz_admit)[3]<-"County"
names(SantaCruz_admit)[4]<-"School"
names(SantaCruz_admit)[5]<-"Race"

SantaCruz_admit
## # A tibble: 167 x 11
##    SchoolID  City   County School Race    adm   app   enr AdmitStat Campus
##    <chr>     <chr>  <chr>  <chr>  <chr> <int> <int> <int>     <dbl> <chr> 
##  1 ALAMEDA … Alame… Alame… ALAME… All       9    18    NA     0.500 Santa…
##  2 ALAMEDA … Alame… Alame… ALAME… Asian    NA     6    NA    NA     Santa…
##  3 ALAMEDA … Alame… Alame… ALAME… Afri…    NA     6    NA    NA     Santa…
##  4 ALAMEDA … Alame… Alame… ALAME… All      74   133    18     0.556 Santa…
##  5 ALAMEDA … Alame… Alame… ALAME… Asian    43    80     9     0.538 Santa…
##  6 ALAMEDA … Alame… Alame… ALAME… Hisp…     4     9    NA     0.444 Santa…
##  7 ALAMEDA … Alame… Alame… ALAME… White    22    32     8     0.688 Santa…
##  8 ALAMEDA … Alame… Alame… ALAME… All      13    27    NA     0.481 Santa…
##  9 ALAMEDA … Alame… Alame… ALAME… Asian     9    17    NA     0.529 Santa…
## 10 ALBANY H… Albany Alame… ALBAN… All      69   106     9     0.651 Santa…
## # ... with 157 more rows, and 1 more variable: Year <dbl>
  ggplot(data=All_campus)+
    geom_point(mapping= aes(x=School, y=AdmitStat, color=Race))+
    facet_wrap(~ City, nrow = 2)+
  labs (y="All campus admissions", x='2017')
## Warning: Removed 13 rows containing missing values (geom_point).

 ggplot(data=Berkeley_admit)+
    geom_point(mapping= aes(x=School, y=AdmitStat, color=Race))+
    facet_wrap(~ City, nrow = 2) +
 labs(y= "Berkeley admissions", x="2017")
## Warning: Removed 81 rows containing missing values (geom_point).

 ggplot(data=Davis_admit)+
    geom_point(mapping= aes(x=School, y=AdmitStat, color=Race))+
    facet_wrap(~ City, nrow = 2) +
 labs(y= "Davis admissions", x="2017")
## Warning: Removed 45 rows containing missing values (geom_point).

 ggplot(data=Irvine_admit)+
    geom_point(mapping= aes(x=School, y=AdmitStat, color=Race))+
    facet_wrap(~ City, nrow = 2) +
 labs(y= "Irvine admissions", x="2017")
## Warning: Removed 49 rows containing missing values (geom_point).

 ggplot(data=LA_admit)+
    geom_point(mapping= aes(x=School, y=AdmitStat, color=Race))+
    facet_wrap(~ City, nrow = 2) +
 labs(y= "Los Angeles admissions", x="2017")
## Warning: Removed 95 rows containing missing values (geom_point).

 ggplot(data=Merced_admit)+
    geom_point(mapping= aes(x=School, y=AdmitStat, color=Race))+
    facet_wrap(~ City, nrow = 2) +
 labs(y= "Merced admissions", x="2017")
## Warning: Removed 7 rows containing missing values (geom_point).

 ggplot(data=Riverside_admit)+
    geom_point(mapping= aes(x=School, y=AdmitStat, color=Race))+
    facet_wrap(~ City, nrow = 2) +
 labs(y= "Riverside admissions", x="2017")
## Warning: Removed 18 rows containing missing values (geom_point).

 ggplot(data=SanDiego_admit)+
    geom_point(mapping= aes(x=School, y=AdmitStat, color=Race))+
    facet_wrap(~ City, nrow = 2) +
 labs(y= "San Diego admissions", x="2017")
## Warning: Removed 49 rows containing missing values (geom_point).

 ggplot(data=SantaBarb_admit)+
    geom_point(mapping= aes(x=School, y=AdmitStat, color=Race))+
    facet_wrap(~ City, nrow = 2) +
 labs(y= "SantaBarb admissions", x="2017")
## Warning: Removed 48 rows containing missing values (geom_point).

 ggplot(data=SantaCruz_admit)+
    geom_point(mapping= aes(x=School, y=AdmitStat, color=Race))+
    facet_wrap(~ City, nrow = 2) +
 labs(y= "SantaCruz admissions", x="2017")
## Warning: Removed 26 rows containing missing values (geom_point).

tree_modAll <- rpart(AdmitStat ~ City + Race, data = All_campus) 
plot(as.party(tree_modAll))

tree_modBerkeley <- rpart(AdmitStat ~ City + Race, data = Berkeley_admit) 
plot(as.party(tree_modBerkeley))

tree_modDavis <- rpart(AdmitStat ~ City + Race, data = Davis_admit) 
plot(as.party(tree_modDavis))

tree_modIrvine <- rpart(AdmitStat ~ City + Race, data = Irvine_admit) 
plot(as.party(tree_modIrvine))

tree_modLA <- rpart(AdmitStat ~ City + Race, data = LA_admit) 
plot(as.party(tree_modLA))

tree_modMerced <- rpart(AdmitStat ~ City + Race, data = Merced_admit) 
plot(as.party(tree_modMerced))

tree_modRiverside <- rpart(AdmitStat ~ City + Race, data = Riverside_admit) 
plot(as.party(tree_modRiverside))

tree_modSanDiego <- rpart(AdmitStat ~ City + Race, data = SanDiego_admit) 
plot(as.party(tree_modSanDiego))

tree_modSantaBarb <- rpart(AdmitStat ~ City + Race, data = SantaBarb_admit) 
plot(as.party(tree_modSantaBarb))

tree_modSantaCruz <- rpart(AdmitStat ~ City + Race, data = SantaCruz_admit) 
plot(as.party(tree_modSantaCruz))

AdmittanceAll <- lm(AdmitStat ~ City + Race, data = All_campus)
# take a look at the features and coefficients
tidy(AdmittanceAll)
##                    term    estimate  std.error  statistic      p.value
## 1           (Intercept)  0.58908165 0.03765383 15.6446674 6.966630e-35
## 2            CityAlbany -0.04364858 0.06180058 -0.7062811 4.809690e-01
## 3          CityBerkeley  0.03131487 0.05438830  0.5757648 5.655267e-01
## 4     CityCastro Valley -0.13008122 0.06180058 -2.1048544 3.675684e-02
## 5            CityDublin -0.05915651 0.05807523 -1.0186186 3.098147e-01
## 6        CityEmeryville -0.43761167 0.12665553 -3.4551328 6.927625e-04
## 7           CityFremont -0.06134431 0.03823063 -1.6045855 1.104196e-01
## 8           CityHayward -0.04176112 0.04077306 -1.0242332 3.071635e-01
## 9         CityLivermore -0.12347166 0.04679446 -2.6385958 9.089147e-03
## 10           CityNewark -0.20051806 0.06765016 -2.9640440 3.467069e-03
## 11          CityOakland -0.09221864 0.03304373 -2.7908062 5.851823e-03
## 12         CityPiedmont -0.07456624 0.06184689 -1.2056587 2.296051e-01
## 13       CityPleasanton -0.09781594 0.04862928 -2.0114621 4.583794e-02
## 14      CitySan Leandro -0.18882813 0.06180058 -3.0554427 2.605618e-03
## 15      CitySan Lorenzo -0.05534944 0.04545184 -1.2177603 2.249830e-01
## 16       CityUnion City -0.16070190 0.06180058 -2.6003300 1.012369e-02
## 17              RaceAll  0.12125729 0.03072684  3.9462993 1.154763e-04
## 18            RaceAsian  0.23301564 0.03338727  6.9791767 6.168482e-11
## 19 RaceDomestic Unknown  0.33997418 0.07810254  4.3529211 2.299817e-05
## 20 RaceHispanic/ Latino  0.06605839 0.03146694  2.0992949 3.725052e-02
## 21  RaceInter- national  0.36219765 0.07861341  4.6073263 7.913258e-06
## 22            RaceWhite  0.13474682 0.03661642  3.6799561 3.118570e-04
AdmittanceBerk <- lm(AdmitStat ~ City + Race, data = Berkeley_admit)
# take a look at the features and coefficients
tidy(AdmittanceBerk)
##                    term    estimate  std.error  statistic      p.value
## 1           (Intercept)  0.30170196 0.04699590  6.4197501 1.020036e-08
## 2            CityAlbany -0.07676263 0.05302535 -1.4476590 1.517719e-01
## 3          CityBerkeley -0.09253793 0.04544192 -2.0364002 4.514851e-02
## 4     CityCastro Valley -0.17143770 0.05399522 -3.1750532 2.154705e-03
## 5            CityDublin -0.02424574 0.05525148 -0.4388251 6.620172e-01
## 6           CityFremont -0.10142922 0.03631176 -2.7932884 6.579716e-03
## 7           CityHayward -0.04666524 0.03892313 -1.1989077 2.342405e-01
## 8         CityLivermore -0.16983582 0.04772906 -3.5583318 6.429013e-04
## 9            CityNewark -0.20224649 0.08437664 -2.3969489 1.895709e-02
## 10          CityOakland -0.05127682 0.03183892 -1.6105074 1.113793e-01
## 11         CityPiedmont -0.11718747 0.05302535 -2.2100273 3.007367e-02
## 12       CityPleasanton -0.07416052 0.03915978 -1.8937931 6.200849e-02
## 13      CitySan Leandro -0.10454159 0.06240456 -1.6752235 9.794717e-02
## 14      CitySan Lorenzo -0.14728550 0.04815752 -3.0584110 3.060024e-03
## 15       CityUnion City -0.09270753 0.05302535 -1.7483625 8.438693e-02
## 16              RaceAll -0.02253240 0.03915223 -0.5755074 5.666263e-01
## 17            RaceAsian  0.01026809 0.04122732  0.2490605 8.039772e-01
## 18 RaceDomestic Unknown  0.07631029 0.05876042  1.2986681 1.979337e-01
## 19 RaceHispanic/ Latino -0.01022204 0.04329386 -0.2361084 8.139755e-01
## 20            RaceWhite  0.02830041 0.04679627  0.6047578 5.471180e-01
AdmittanceDavis<- lm(AdmitStat ~ City + Race, data = Davis_admit)
# take a look at the features and coefficients
tidy(AdmittanceDavis)
##                    term     estimate  std.error  statistic      p.value
## 1           (Intercept)  0.436100047 0.06126792  7.1179183 9.792852e-11
## 2            CityAlbany  0.012192978 0.06752361  0.1805736 8.570174e-01
## 3          CityBerkeley  0.005691782 0.05662403  0.1005189 9.201059e-01
## 4     CityCastro Valley -0.131550946 0.06317815 -2.0822222 3.952088e-02
## 5            CityDublin -0.071224657 0.06991099 -1.0187906 3.104225e-01
## 6           CityFremont -0.128391850 0.04727786 -2.7156863 7.625403e-03
## 7           CityHayward -0.023336212 0.04568820 -0.5107711 6.104820e-01
## 8         CityLivermore -0.103765112 0.05367040 -1.9333770 5.562660e-02
## 9            CityNewark -0.129171703 0.06752361 -1.9129857 5.821507e-02
## 10          CityOakland -0.014355825 0.04076334 -0.3521749 7.253461e-01
## 11         CityPiedmont -0.139641731 0.07528275 -1.8548969 6.615017e-02
## 12       CityPleasanton -0.099163436 0.05290598 -1.8743331 6.340014e-02
## 13      CitySan Leandro -0.137009083 0.07554368 -1.8136405 7.231827e-02
## 14      CitySan Lorenzo -0.187802405 0.05400754 -3.4773370 7.137036e-04
## 15       CityUnion City -0.165657404 0.06752361 -2.4533256 1.564156e-02
## 16              RaceAll -0.026118890 0.05166091 -0.5055832 6.141086e-01
## 17            RaceAsian  0.019387707 0.05333977  0.3634756 7.169112e-01
## 18 RaceDomestic Unknown  0.188748337 0.08918513  2.1163655 3.645159e-02
## 19 RaceHispanic/ Latino -0.028420496 0.05363832 -0.5298543 5.972257e-01
## 20  RaceInter- national  0.277677596 0.09934186  2.7951721 6.072113e-03
## 21            RaceWhite  0.028666470 0.05851687  0.4898839 6.251413e-01
AdmittanceIrvine <- lm(AdmitStat ~ City + Race, data = Irvine_admit)
# take a look at the features and coefficients
tidy(AdmittanceBerk)
##                    term    estimate  std.error  statistic      p.value
## 1           (Intercept)  0.30170196 0.04699590  6.4197501 1.020036e-08
## 2            CityAlbany -0.07676263 0.05302535 -1.4476590 1.517719e-01
## 3          CityBerkeley -0.09253793 0.04544192 -2.0364002 4.514851e-02
## 4     CityCastro Valley -0.17143770 0.05399522 -3.1750532 2.154705e-03
## 5            CityDublin -0.02424574 0.05525148 -0.4388251 6.620172e-01
## 6           CityFremont -0.10142922 0.03631176 -2.7932884 6.579716e-03
## 7           CityHayward -0.04666524 0.03892313 -1.1989077 2.342405e-01
## 8         CityLivermore -0.16983582 0.04772906 -3.5583318 6.429013e-04
## 9            CityNewark -0.20224649 0.08437664 -2.3969489 1.895709e-02
## 10          CityOakland -0.05127682 0.03183892 -1.6105074 1.113793e-01
## 11         CityPiedmont -0.11718747 0.05302535 -2.2100273 3.007367e-02
## 12       CityPleasanton -0.07416052 0.03915978 -1.8937931 6.200849e-02
## 13      CitySan Leandro -0.10454159 0.06240456 -1.6752235 9.794717e-02
## 14      CitySan Lorenzo -0.14728550 0.04815752 -3.0584110 3.060024e-03
## 15       CityUnion City -0.09270753 0.05302535 -1.7483625 8.438693e-02
## 16              RaceAll -0.02253240 0.03915223 -0.5755074 5.666263e-01
## 17            RaceAsian  0.01026809 0.04122732  0.2490605 8.039772e-01
## 18 RaceDomestic Unknown  0.07631029 0.05876042  1.2986681 1.979337e-01
## 19 RaceHispanic/ Latino -0.01022204 0.04329386 -0.2361084 8.139755e-01
## 20            RaceWhite  0.02830041 0.04679627  0.6047578 5.471180e-01
AdmittanceLA <- lm(AdmitStat ~ City + Race, data = LA_admit)
# take a look at the features and coefficients
tidy(AdmittanceLA)
##                    term     estimate  std.error   statistic    p.value
## 1           (Intercept)  0.155338019 0.08288356  1.87417168 0.06687533
## 2            CityAlbany  0.013148437 0.05496244  0.23922587 0.81192826
## 3          CityBerkeley -0.020939401 0.05496244 -0.38097658 0.70486747
## 4     CityCastro Valley -0.006319712 0.05271924 -0.11987488 0.90507242
## 5            CityDublin  0.016699906 0.05753768  0.29024295 0.77285519
## 6           CityFremont -0.004962854 0.04285261 -0.11581220 0.90827472
## 7           CityHayward  0.042436924 0.04875717  0.87037300 0.38834068
## 8         CityLivermore  0.016176491 0.04835639  0.33452645 0.73941007
## 9            CityNewark -0.061777550 0.08092268 -0.76341449 0.44887641
## 10          CityOakland  0.058947695 0.04135031  1.42556823 0.16033256
## 11         CityPiedmont  0.001397573 0.05496244  0.02542778 0.97981702
## 12       CityPleasanton -0.012686211 0.04828042 -0.26276101 0.79383646
## 13      CitySan Leandro  0.037066677 0.06264863  0.59165981 0.55679844
## 14      CitySan Lorenzo -0.050225258 0.05527657 -0.90861749 0.36799954
## 15       CityUnion City  0.009740954 0.06264863  0.15548551 0.87707735
## 16              RaceAll -0.007846184 0.07511015 -0.10446236 0.91722887
## 17            RaceAsian -0.001350464 0.07612039 -0.01774115 0.98591739
## 18 RaceDomestic Unknown  0.117587065 0.08843929  1.32957945 0.18981372
## 19 RaceHispanic/ Latino  0.019328381 0.09049700  0.21358034 0.83176054
## 20            RaceWhite  0.007268717 0.07794905  0.09324960 0.92608552
AdmittanceMerced <- lm(AdmitStat ~ City + Race, data = Merced_admit)
# take a look at the features and coefficients
tidy(AdmittanceMerced)
##                    term     estimate  std.error   statistic      p.value
## 1           (Intercept)  0.928250569 0.07768897 11.94829315 2.038593e-20
## 2            CityAlbany -0.040288380 0.09548715 -0.42192464 6.740643e-01
## 3          CityBerkeley -0.018618669 0.07117581 -0.26158702 7.942246e-01
## 4     CityCastro Valley -0.061636086 0.08248297 -0.74725830 4.568133e-01
## 5            CityDublin -0.196820135 0.08248297 -2.38619111 1.907240e-02
## 6           CityFremont -0.011046024 0.05905941 -0.18703242 8.520471e-01
## 7           CityHayward -0.168428324 0.05755990 -2.92614018 4.322941e-03
## 8         CityLivermore -0.063570795 0.06954650 -0.91407608 3.630663e-01
## 9            CityNewark -0.181955047 0.09548715 -1.90554492 5.983197e-02
## 10          CityOakland -0.185199864 0.05079990 -3.64567384 4.414968e-04
## 11         CityPiedmont -0.038262102 0.08248297 -0.46387880 6.438303e-01
## 12       CityPleasanton -0.027939846 0.06404660 -0.43624246 6.636828e-01
## 13      CitySan Leandro -0.152972444 0.08278217 -1.84789117 6.783113e-02
## 14      CitySan Lorenzo -0.041431090 0.06229190 -0.66511196 5.076428e-01
## 15       CityUnion City -0.175293873 0.08278217 -2.11753170 3.691214e-02
## 16              RaceAll -0.072495955 0.06395157 -1.13360715 2.599048e-01
## 17            RaceAsian -0.003428424 0.06683122 -0.05129973 9.591979e-01
## 18 RaceDomestic Unknown  0.082795455 0.11182437  0.74040620 4.609390e-01
## 19 RaceHispanic/ Latino -0.128382218 0.06554627 -1.95865011 5.318234e-02
## 20            RaceWhite -0.004755309 0.07817703 -0.06082744 9.516286e-01
AdmittanceRiverside<- lm(AdmitStat ~ City + Race, data = Riverside_admit)
# take a look at the features and coefficients
tidy(AdmittanceRiverside)
##                    term    estimate  std.error  statistic      p.value
## 1           (Intercept)  0.68614363 0.09055040  7.5774775 6.594168e-11
## 2            CityAlbany -0.02395866 0.09281323 -0.2581384 7.969888e-01
## 3          CityBerkeley -0.14328209 0.08063243 -1.7769785 7.952109e-02
## 4     CityCastro Valley -0.05328411 0.08627350 -0.6176185 5.386492e-01
## 5            CityDublin  0.08427923 0.09281323  0.9080519 3.666841e-01
## 6           CityFremont -0.01408112 0.06174472 -0.2280538 8.202090e-01
## 7           CityHayward -0.16686131 0.06530323 -2.5551771 1.258479e-02
## 8         CityLivermore -0.08058540 0.07515838 -1.0722078 2.869757e-01
## 9            CityNewark -0.16160154 0.10995345 -1.4697269 1.457100e-01
## 10          CityOakland -0.30546094 0.06261875 -4.8781069 5.646804e-06
## 11         CityPiedmont  0.13139189 0.09281323  1.4156590 1.609075e-01
## 12       CityPleasanton  0.04670401 0.07061709  0.6613698 5.103494e-01
## 13      CitySan Leandro -0.32898717 0.10757104 -3.0583246 3.060810e-03
## 14      CitySan Lorenzo -0.30118592 0.07626552 -3.9491753 1.722958e-04
## 15       CityUnion City -0.23461552 0.09352685 -2.5085366 1.422558e-02
## 16              RaceAll  0.02292727 0.07653816  0.2995534 7.653249e-01
## 17            RaceAsian  0.09609314 0.07854389  1.2234324 2.248973e-01
## 18 RaceDomestic Unknown  0.30767578 0.11138828  2.7621917 7.176565e-03
## 19 RaceHispanic/ Latino -0.02756700 0.07958587 -0.3463805 7.300015e-01
## 20            RaceWhite  0.05022177 0.08393068  0.5983720 5.513479e-01
AdmittanceSanDiego <- lm(AdmitStat ~ City + Race, data = SanDiego_admit)
# take a look at the features and coefficients
tidy(AdmittanceSanDiego)
##                    term     estimate  std.error   statistic      p.value
## 1           (Intercept)  0.348812843 0.08118680  4.29642324 5.207291e-05
## 2            CityAlbany  0.071694117 0.07813186  0.91760416 3.618072e-01
## 3          CityBerkeley -0.097737566 0.06681538 -1.46280047 1.477564e-01
## 4     CityCastro Valley -0.088278127 0.07042032 -1.25358880 2.139376e-01
## 5            CityDublin -0.155540428 0.07813186 -1.99074274 5.020209e-02
## 6           CityFremont -0.082676158 0.05330210 -1.55108622 1.251476e-01
## 7           CityHayward  0.002984948 0.05184676  0.05757251 9.542443e-01
## 8         CityLivermore -0.134591206 0.06206173 -2.16866661 3.332198e-02
## 9            CityNewark -0.108630720 0.09117640 -1.19143468 2.372903e-01
## 10          CityOakland -0.023114482 0.04769992 -0.48458115 6.294048e-01
## 11         CityPiedmont -0.121263043 0.07813186 -1.55203072 1.249215e-01
## 12       CityPleasanton -0.082466339 0.06058815 -1.36109681 1.776137e-01
## 13      CitySan Leandro -0.088017999 0.09117640 -0.96535949 3.375091e-01
## 14      CitySan Lorenzo  0.001177797 0.06147418  0.01915922 9.847657e-01
## 15       CityUnion City -0.041678374 0.07813186 -0.53343637 5.953302e-01
## 16              RaceAll  0.023805554 0.07288298  0.32662706 7.448720e-01
## 17            RaceAsian  0.070189174 0.07368340  0.95257787 3.439052e-01
## 18 RaceDomestic Unknown  0.206172199 0.11316300  1.82190471 7.251028e-02
## 19 RaceHispanic/ Latino  0.018689222 0.08032452  0.23267145 8.166589e-01
## 20            RaceWhite  0.062932667 0.07979894  0.78864038 4.328415e-01
AdmittanceSantaBarb <- lm(AdmitStat ~ City + Race, data = SantaBarb_admit)
# take a look at the features and coefficients
tidy(AdmittanceSantaBarb)
##                    term     estimate  std.error  statistic      p.value
## 1           (Intercept)  0.457249368 0.06593913  6.9344159 9.023858e-10
## 2            CityAlbany  0.049374434 0.07472863  0.6607164 5.106690e-01
## 3          CityBerkeley -0.056636379 0.06323152 -0.8956985 3.730679e-01
## 4     CityCastro Valley -0.115004109 0.06323152 -1.8187782 7.264117e-02
## 5            CityDublin -0.079647493 0.06848780 -1.1629443 2.482668e-01
## 6           CityFremont -0.022778571 0.04724560 -0.4821310 6.310131e-01
## 7           CityHayward -0.024272041 0.05048481 -0.4807791 6.319696e-01
## 8         CityLivermore -0.005568372 0.05220010 -0.1066736 9.153117e-01
## 9            CityNewark -0.025882784 0.08807543 -0.2938706 7.696084e-01
## 10          CityOakland -0.073082757 0.04735976 -1.5431405 1.266947e-01
## 11         CityPiedmont -0.066509410 0.07472863 -0.8900124 3.760950e-01
## 12       CityPleasanton -0.029968514 0.05277722 -0.5678305 5.717207e-01
## 13      CitySan Leandro -0.051428940 0.07525654 -0.6833817 4.963159e-01
## 14      CitySan Lorenzo -0.075843231 0.06749297 -1.1237204 2.644510e-01
## 15       CityUnion City -0.048391514 0.07472863 -0.6475632 5.190987e-01
## 16              RaceAll -0.111377551 0.05755343 -1.9352027 5.645539e-02
## 17            RaceAsian -0.055059321 0.05814849 -0.9468745 3.465190e-01
## 18 RaceDomestic Unknown  0.068667015 0.08324211  0.8249072 4.118470e-01
## 19 RaceHispanic/ Latino -0.113524411 0.06428791 -1.7658751 8.118495e-02
## 20            RaceWhite -0.070410663 0.06135625 -1.1475711 2.545239e-01
AdmittanceSantaCruz <- lm(AdmitStat ~ City + Race, data = SantaCruz_admit)
# take a look at the features and coefficients
tidy(AdmittanceSantaCruz)
##                    term     estimate  std.error  statistic      p.value
## 1           (Intercept)  0.589985725 0.09165207  6.4372333 2.615254e-09
## 2            CityAlbany -0.011885319 0.10133643 -0.1172857 9.068297e-01
## 3          CityBerkeley  0.139507557 0.08482305  1.6446892 1.026511e-01
## 4     CityCastro Valley -0.149847332 0.10133643 -1.4787113 1.418377e-01
## 5            CityDublin -0.089534350 0.11299597 -0.7923676 4.297100e-01
## 6           CityFremont -0.095179847 0.06823796 -1.3948225 1.656459e-01
## 7           CityHayward -0.101494194 0.07113650 -1.4267526 1.562479e-01
## 8         CityLivermore -0.115521248 0.08049385 -1.4351562 1.538439e-01
## 9            CityNewark -0.232994075 0.10133643 -2.2992133 2.322204e-02
## 10          CityOakland -0.084864813 0.05990685 -1.4166129 1.591869e-01
## 11         CityPiedmont -0.138071141 0.11299597 -1.2219121 2.241358e-01
## 12       CityPleasanton -0.093757794 0.08172427 -1.1472454 2.535624e-01
## 13      CitySan Leandro -0.116385123 0.11330381 -1.0271951 3.063947e-01
## 14      CitySan Lorenzo -0.049459556 0.08101657 -0.6104869 5.426938e-01
## 15       CityUnion City -0.156494807 0.10133643 -1.5443094 1.251465e-01
## 16              RaceAll -0.001426157 0.07712750 -0.0184909 9.852779e-01
## 17            RaceAsian  0.104907978 0.08026846  1.3069639 1.937240e-01
## 18 RaceDomestic Unknown  0.230497613 0.13312320  1.7314609 8.593958e-02
## 19 RaceHispanic/ Latino -0.064958715 0.07909208 -0.8213049 4.131013e-01
## 20  RaceInter- national  0.505194122 0.15026000  3.3621332 1.038191e-03
## 21            RaceWhite  0.119031531 0.08579450  1.3874027 1.678902e-01